In [48]:
import pandas as pd
import rpy2
In [8]:
auto = pd.read_csv('Auto.csv')
In [11]:
auto.head()
Out[11]:
In [13]:
auto.describe()
Out[13]:
Take a quick look the factors in Autos.csv
In [26]:
axes = pd.tools.plotting.scatter_matrix(auto, alpha=0.2, figsize=(14,10), diagonal='kde')
Add a mpg/weight column
In [33]:
auto['mpg_per_weight'] = auto.mpg / auto.weight
print(auto[['mpg', 'weight', 'mpg_per_weight']].head())
auto.mpg_per_weight.plot()
Out[33]:
In [34]:
auto.head(2)
Out[34]:
In [38]:
%load_ext rmagic
In [44]:
# this command pushes the pandas.DataFrame auto to R-land
%Rpush auto
In [61]:
%R auto2 = data.frame(auto);
In [60]:
%%R
pint(head(auto2, 2))
In [50]:
%R pairs(auto)
In [69]:
data = np.random.randn(15000)
plt.hist(data, bins=30)
plt.ylabel('Counts')
plt.title('The Gaussian Distribution')
Out[69]:
In [80]:
x = np.random.randn(5000)
y = np.random.randn(5000)
plt.plot(x, y, 'ro')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Scatter plot: Normal vs. Normal')
Out[80]:
In [77]:
plt.scatter(x, y)
Out[77]:
In [83]:
plt.boxplot([x, y])
plt.title('Two box plots, side by side')
Out[83]:
In [76]:
s = np.linspace(0, 10)
plt.plot(s, s ** 2, 'r--')
Out[76]:
In [88]:
auto.plot(x='weight', y='mpg', style='bo')
plt.title('Scatterplot: Mpg vs. Weight')
plt.figure()
auto.hist('mpg')
plt.title('Histogram of mpg')
Out[88]:
In [89]:
from pandas.tools.plotting import scatter_matrix
_ = scatter_matrix(auto[['mpg', 'cylinders', 'displacement']], figsize=(14, 10))